###############################################################
# R Script for: Boilerplate in International Trade Agreements #
#      Claire Peacock, Karolina Milewicz, Duncan Snidal       #
#         International Studies Quarterly                     #
###############################################################

# May 22, 2019

# Note: script last tested using R version 3.5.2 (2018-12-20) -- "Eggshell Igloo"

###########################################################################################
#                 (Install if required) and load the following libraries                  #
###########################################################################################
#install.packages("lolog") 
library(lolog) # script last tested on version 1.2
# install.packages("network")
library(network) # script last tested on version 1.14-377
# install.packages("dotwhisker")
library(dotwhisker) # script last tested on version 0.50
# install.packages("magrittr")
library(magrittr) # script last tested on version 1.5
# install.packages("gridExtra")
library(gridExtra) # script last tested on version 2.3
# install.packages("quanteda")
library(quanteda) # script last tested on version 1.4.1
# install.packages("igraph") 
library(igraph) # script last tested on version 1.2.4


###########################################################################################
#                                Load the Data                                            #
###########################################################################################
# Set your working directory (should be same folder as where you have save the data)

# setwd("/Users/myname/myfolder") # edit to match your file path
setwd("/Users/clairepeacock/Sync/Boilerplating_analysis/Replication Files")
load("boilerplate_replication.RData") 


###########################################################################################
#                              Notes about Variable Names                                 #
###########################################################################################

# For all variables, the relevant network is denoted by "lab" for "labor" and "env" for "environment". 
# Objects ending in "_net" are saved in network format

# Dependent networks are named as follows:
    # 1. e.g. "lab_cos_sim5_1_net" 
          # "lab" for labor provisions, "cos" for cosine similarity,  "sim5" for similarity calculated on 5 grams, "_1" for weakest ties where cosine sim. is >=0.25 (Note: a 2 would be for medium strength ties, and a 3 for strongest ties, see paper for description) 
    # 2. e.g. "env_extjac_sim3_2_net"
          # "env" for environmental provisions, "extjac" for extended jaccard similarity, "sim5" for similarity calculated on 3 grams, "_2" for medium strength ties where ext.jac. similarity is >=0.50.

# Other variables are:
# no_common_member = No Common Member
# common_member_WB_high = Common High-Income Member 
# common_member_WB_other = Common Low-Income Member
# HInc = High-Income Member 
# ciri_worker_of_max_rgdpe_cap_reverse = Ciri Worker Index (reversed) of member state with maximum real GDP per capita (a measure of NTI Performance (Max GDP/CAP))
# ciri_worker_of_max_rgdpo_reverse = Ciri Worker Index (reversed) of member state with maximum real GDP (a measure of NTI Performance (Max GDP))
# ciri_physint_of_max_rgdpe_cap_reverse = Ciri Physical Integrity Rights Index (reversed) of member state with maximum real GDP per capita (a measure of NTI Performance (Max GDP/CAP))
# ciri_physint_of_max_rgdpo_reverse = Ciri Physical Integrity Rights Index (reversed) of member state with maximum real GDP (a measure of NTI Performance (Max GDP))
# top20GDP_PTAmember_sample_years_lab = Top 20 GDP Member 
# RegionCon = Region code 
# fulltext... = Full-Text Tie
# time_count = Years Apart
# bilateral = Bilateral
# EPI... = Environmental Performance Index (a measure of NTI Performance)
# greenhouse_of... = five-year moving average of the annual percent change in greenhouse gas emissions (a measure of NTI Performance)

# Other Notes:
# What is labelled "EC" in the data files is relabelled as "EUU" in the paper/Appendix

###########################################################################################
#                                Prepare the Data                                         #
###########################################################################################

# Save dyad-level variables that are in network format as matrices to work in lolog package 
# Labor
common_member_WB_high_lab<-  as.sociomatrix(common_member_WB_high_lab_net)
common_member_WB_other_lab<-  as.sociomatrix(common_member_WB_other_lab_net)
common_member_lab <- as.sociomatrix(common_member_lab_net) 
fulltext_cos_sim5_1_lab <- as.sociomatrix(fulltext_cos_sim5_1_lab_net) 
fulltext_cos_sim5_2_lab <- as.sociomatrix(fulltext_cos_sim5_2_lab_net) 
fulltext_cos_sim5_3_lab <- as.sociomatrix(fulltext_cos_sim5_3_lab_net) 
fulltext_cos_sim3_1_lab <- as.sociomatrix(fulltext_cos_sim5_1_lab_net) 
fulltext_cos_sim3_2_lab <- as.sociomatrix(fulltext_cos_sim5_2_lab_net) 
fulltext_cos_sim3_3_lab <- as.sociomatrix(fulltext_cos_sim5_3_lab_net) 
fulltext_extjac_sim5_1_lab <- as.sociomatrix(fulltext_extjac_sim5_1_lab_net) 
fulltext_extjac_sim5_2_lab <- as.sociomatrix(fulltext_extjac_sim5_2_lab_net) 
fulltext_extjac_sim5_3_lab <- as.sociomatrix(fulltext_extjac_sim5_3_lab_net) 
fulltext_extjac_sim3_1_lab <- as.sociomatrix(fulltext_extjac_sim5_1_lab_net) 
fulltext_extjac_sim3_2_lab <- as.sociomatrix(fulltext_extjac_sim5_2_lab_net) 
fulltext_extjac_sim3_3_lab <- as.sociomatrix(fulltext_extjac_sim5_3_lab_net) 
# Environment 
common_member_WB_high_env<-  as.sociomatrix(common_member_WB_high_env_net)
common_member_WB_other_env<-  as.sociomatrix(common_member_WB_other_env_net)
common_member_env <- as.sociomatrix(common_member_env_net) 
fulltext_cos_sim5_1_env <- as.sociomatrix(fulltext_cos_sim5_1_env_net) 
fulltext_cos_sim5_2_env <- as.sociomatrix(fulltext_cos_sim5_2_env_net) 
fulltext_cos_sim5_3_env <- as.sociomatrix(fulltext_cos_sim5_3_env_net) 
fulltext_cos_sim3_1_env <- as.sociomatrix(fulltext_cos_sim5_1_env_net) 
fulltext_cos_sim3_2_env <- as.sociomatrix(fulltext_cos_sim5_2_env_net) 
fulltext_cos_sim3_3_env <- as.sociomatrix(fulltext_cos_sim5_3_env_net) 
fulltext_extjac_sim5_1_env <- as.sociomatrix(fulltext_extjac_sim5_1_env_net) 
fulltext_extjac_sim5_2_env <- as.sociomatrix(fulltext_extjac_sim5_2_env_net) 
fulltext_extjac_sim5_3_env <- as.sociomatrix(fulltext_extjac_sim5_3_env_net) 
fulltext_extjac_sim3_1_env <- as.sociomatrix(fulltext_extjac_sim5_1_env_net) 
fulltext_extjac_sim3_2_env <- as.sociomatrix(fulltext_extjac_sim5_2_env_net) 
fulltext_extjac_sim3_3_env <- as.sociomatrix(fulltext_extjac_sim5_3_env_net) 


# Flip scale of ciri_worker & ciri_physint variables 
ciri_worker_of_max_rgdpe_cap_reverse <- lab_cos_sim5_2_net %v% "ciri_worker_of_max_rgdpe_cap" 
ciri_worker_of_max_rgdpe_cap_reverse <- abs(ciri_worker_of_max_rgdpe_cap_reverse-2)
ciri_worker_of_max_rgdpo_reverse <- lab_cos_sim5_2_net %v% "ciri_worker_of_max_rgdpo" 
ciri_worker_of_max_rgdpo_reverse <- abs(ciri_worker_of_max_rgdpo_reverse-2)
ciri_physint_of_max_rgdpe_cap_reverse <- lab_cos_sim5_2_net %v% "ciri_physint_of_max_rgdpe_cap" 
ciri_physint_of_max_rgdpe_cap_reverse <- abs(ciri_physint_of_max_rgdpe_cap_reverse-8)
# Resave flipped versions as vertex attributes in all labor networks
lab_cos_sim5_3_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse
lab_cos_sim5_2_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_cos_sim5_1_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_cos_sim3_3_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse
lab_cos_sim3_2_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_cos_sim3_1_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_cos_sim5_3_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse
lab_cos_sim5_2_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_cos_sim5_1_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_cos_sim3_3_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse
lab_cos_sim3_2_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_cos_sim3_1_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_cos_sim5_3_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse
lab_cos_sim5_2_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse 
lab_cos_sim5_1_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse 
lab_cos_sim3_3_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse
lab_cos_sim3_2_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse 
lab_cos_sim3_1_net%v%"ciri_physint_of_max_rgdpe_cap_reverse" <-ciri_physint_of_max_rgdpe_cap_reverse 
lab_extjac_sim5_3_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse
lab_extjac_sim5_2_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_extjac_sim5_1_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_extjac_sim3_3_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse
lab_extjac_sim3_2_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_extjac_sim3_1_net%v%"ciri_worker_of_max_rgdpe_cap_reverse" <-ciri_worker_of_max_rgdpe_cap_reverse 
lab_extjac_sim5_3_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse
lab_extjac_sim5_2_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_extjac_sim5_1_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_extjac_sim3_3_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse
lab_extjac_sim3_2_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 
lab_extjac_sim3_1_net%v%"ciri_worker_of_max_rgdpo_reverse" <-ciri_worker_of_max_rgdpo_reverse 

# Make a variable for not having a common member
# Labor
no_common_member_lab <- common_member_lab
no_common_member_lab <- ifelse(no_common_member_lab==0,1,0)
# Environment 
no_common_member_env <- common_member_env
no_common_member_env <- ifelse(no_common_member_env==0,1,0)

# Make a bilateral/not dummy variable 
# Labor
members <- lab_cos_sim5_2_net %v% "members_count" # Note: doesn't matter which net "members_count" is taken from as it does noto change from one net to another
bilateral <- ifelse(members==2,1,0)
# Save as a vertex attribute
lab_cos_sim5_1_net%v%"bilateral" <- bilateral
lab_cos_sim5_2_net%v%"bilateral" <- bilateral
lab_cos_sim5_3_net%v%"bilateral" <- bilateral
lab_cos_sim3_1_net%v%"bilateral" <- bilateral
lab_cos_sim3_2_net%v%"bilateral" <- bilateral
lab_cos_sim3_3_net%v%"bilateral" <- bilateral
members <- lab_extjac_sim5_2_net %v% "members_count" 
bilateral <- ifelse(members==2,1,0)
lab_extjac_sim5_1_net%v%"bilateral" <- bilateral
lab_extjac_sim5_2_net%v%"bilateral" <- bilateral
lab_extjac_sim5_3_net%v%"bilateral" <- bilateral
lab_extjac_sim3_1_net%v%"bilateral" <- bilateral
lab_extjac_sim3_2_net%v%"bilateral" <- bilateral
lab_extjac_sim3_3_net%v%"bilateral" <- bilateral
# Environment
members <- env_cos_sim5_2_net %v% "members_count" 
bilateral <- ifelse(members==2,1,0)
# Save as a vertex attribute
env_cos_sim5_1_net%v%"bilateral" <- bilateral
env_cos_sim5_2_net%v%"bilateral" <- bilateral
env_cos_sim5_3_net%v%"bilateral" <- bilateral
env_cos_sim3_1_net%v%"bilateral" <- bilateral
env_cos_sim3_2_net%v%"bilateral" <- bilateral
env_cos_sim3_3_net%v%"bilateral" <- bilateral
members <- env_extjac_sim5_2_net %v% "members_count" 
bilateral <- ifelse(members==2,1,0)
env_extjac_sim5_1_net%v%"bilateral" <- bilateral
env_extjac_sim5_2_net%v%"bilateral" <- bilateral
env_extjac_sim5_3_net%v%"bilateral" <- bilateral
env_extjac_sim3_1_net%v%"bilateral" <- bilateral
env_extjac_sim3_2_net%v%"bilateral" <- bilateral
env_extjac_sim3_3_net%v%"bilateral" <- bilateral

# Change categorical & count variable formats to work with lolog package 
# Labor
lab_cos_sim5_1_net%v%"time_count_lab" <- as.numeric(lab_cos_sim5_1_net %v% "time_count")
lab_cos_sim5_2_net%v%"time_count_lab" <- as.numeric(lab_cos_sim5_2_net %v% "time_count")
lab_cos_sim5_3_net%v%"time_count_lab" <- as.numeric(lab_cos_sim5_3_net %v% "time_count")
lab_cos_sim5_1_net%v%"RegionCon" <- as.character(lab_cos_sim5_1_net %v% "RegionCon")
lab_cos_sim5_2_net%v%"RegionCon" <- as.character(lab_cos_sim5_2_net %v% "RegionCon")
lab_cos_sim5_3_net%v%"RegionCon" <- as.character(lab_cos_sim5_3_net %v% "RegionCon")
lab_cos_sim5_1_net%v%"HInc" <- as.character(lab_cos_sim5_1_net %v% "HInc")
lab_cos_sim5_2_net%v%"HInc" <- as.character(lab_cos_sim5_2_net %v% "HInc")
lab_cos_sim5_3_net%v%"HInc" <- as.character(lab_cos_sim5_3_net %v% "HInc")
lab_cos_sim5_1_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim5_1_net %v% "top20GDP_PTAmember_sample_years_lab")
lab_cos_sim5_2_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim5_2_net %v% "top20GDP_PTAmember_sample_years_lab")
lab_cos_sim5_3_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim5_3_net %v% "top20GDP_PTAmember_sample_years_lab")
time_count_lab <- lab_cos_sim5_2_net %v% "time_count_lab" # Note: doesn't matter which level, as time_count doesnt change between levels
lab_cos_sim3_1_net%v%"time_count_lab" <- as.numeric(lab_cos_sim3_1_net %v% "time_count")
lab_cos_sim3_3_net%v%"time_count_lab" <- as.numeric(lab_cos_sim3_3_net %v% "time_count")
lab_cos_sim3_2_net%v%"time_count_lab" <- as.numeric(lab_cos_sim3_2_net %v% "time_count")
lab_cos_sim3_1_net%v%"HInc" <- as.character(lab_cos_sim3_1_net %v% "HInc")
lab_cos_sim3_2_net%v%"HInc" <- as.character(lab_cos_sim3_2_net %v% "HInc")
lab_cos_sim3_3_net%v%"HInc" <- as.character(lab_cos_sim3_3_net %v% "HInc")
lab_cos_sim3_1_net%v%"RegionCon" <- as.character(lab_cos_sim3_1_net %v% "RegionCon")
lab_cos_sim3_2_net%v%"RegionCon" <- as.character(lab_cos_sim3_2_net %v% "RegionCon")
lab_cos_sim3_3_net%v%"RegionCon" <- as.character(lab_cos_sim3_3_net %v% "RegionCon")
lab_cos_sim3_1_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim3_1_net %v% "top20GDP_PTAmember_sample_years_lab")
lab_cos_sim3_2_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim3_2_net %v% "top20GDP_PTAmember_sample_years_lab")
lab_cos_sim3_3_net%v%"top20GDP_PTAmember_sample_years_lab" <- as.character(lab_cos_sim3_3_net %v% "top20GDP_PTAmember_sample_years_lab")
lab_extjac_sim5_1_net%v%"time_count_lab" <- as.numeric(lab_extjac_sim5_1_net %v% "time_count")
lab_extjac_sim5_2_net%v%"time_count_lab" <- as.numeric(lab_extjac_sim5_2_net %v% "time_count")
lab_extjac_sim5_3_net%v%"time_count_lab" <- as.numeric(lab_extjac_sim5_3_net %v% "time_count")
lab_extjac_sim5_1_net%v%"RegionCon" <- as.character(lab_extjac_sim5_1_net %v% "RegionCon")
lab_extjac_sim5_2_net%v%"RegionCon" <- as.character(lab_extjac_sim5_2_net %v% "RegionCon")
lab_extjac_sim5_3_net%v%"RegionCon" <- as.character(lab_extjac_sim5_3_net %v% "RegionCon")
lab_extjac_sim5_1_net%v%"HInc" <- as.character(lab_extjac_sim5_1_net %v% "HInc")
lab_extjac_sim5_2_net%v%"HInc" <- as.character(lab_extjac_sim5_2_net %v% "HInc")
lab_extjac_sim5_3_net%v%"HInc" <- as.character(lab_extjac_sim5_3_net %v% "HInc")
# Environment
env_cos_sim5_1_net%v%"time_count_env" <- as.numeric(env_cos_sim5_1_net %v% "time_count")
env_cos_sim5_2_net%v%"time_count_env" <- as.numeric(env_cos_sim5_2_net %v% "time_count")
env_cos_sim5_3_net%v%"time_count_env" <- as.numeric(env_cos_sim5_3_net %v% "time_count")
env_cos_sim5_1_net%v%"RegionCon" <- as.character(env_cos_sim5_1_net %v% "RegionCon")
env_cos_sim5_2_net%v%"RegionCon" <- as.character(env_cos_sim5_2_net %v% "RegionCon")
env_cos_sim5_3_net%v%"RegionCon" <- as.character(env_cos_sim5_3_net %v% "RegionCon")
env_cos_sim5_1_net%v%"HInc" <- as.character(env_cos_sim5_1_net %v% "HInc")
env_cos_sim5_2_net%v%"HInc" <- as.character(env_cos_sim5_2_net %v% "HInc")
env_cos_sim5_3_net%v%"HInc" <- as.character(env_cos_sim5_3_net %v% "HInc")
env_cos_sim3_1_net%v%"time_count_env" <- as.numeric(env_cos_sim3_1_net %v% "time_count")
env_cos_sim3_2_net%v%"time_count_env" <- as.numeric(env_cos_sim3_2_net %v% "time_count")
env_cos_sim3_3_net%v%"time_count_env" <- as.numeric(env_cos_sim3_3_net %v% "time_count")
env_cos_sim3_1_net%v%"RegionCon" <- as.character(env_cos_sim3_1_net %v% "RegionCon")
env_cos_sim3_2_net%v%"RegionCon" <- as.character(env_cos_sim3_2_net %v% "RegionCon")
env_cos_sim3_3_net%v%"RegionCon" <- as.character(env_cos_sim3_3_net %v% "RegionCon")
env_cos_sim3_1_net%v%"HInc" <- as.character(env_cos_sim3_1_net %v% "HInc")
env_cos_sim3_2_net%v%"HInc" <- as.character(env_cos_sim3_2_net %v% "HInc")
env_cos_sim3_3_net%v%"HInc" <- as.character(env_cos_sim3_3_net %v% "HInc")
env_cos_sim5_1_net%v%"top20GDP_PTAmember_sample_years_env" <- as.character(env_cos_sim5_1_net %v% "top20GDP_PTAmember_sample_years_env")
env_cos_sim5_2_net%v%"top20GDP_PTAmember_sample_years_env" <- as.character(env_cos_sim5_2_net %v% "top20GDP_PTAmember_sample_years_env")
env_cos_sim5_3_net%v%"top20GDP_PTAmember_sample_years_env" <- as.character(env_cos_sim5_3_net %v% "top20GDP_PTAmember_sample_years_env")
time_count_env <- env_cos_sim5_2_net %v% "time_count_env" # Note: doesn't matter which level, as time_count doesnt change between levels
env_extjac_sim5_1_net%v%"time_count_env" <- as.numeric(env_extjac_sim5_1_net %v% "time_count")
env_extjac_sim5_2_net%v%"time_count_env" <- as.numeric(env_extjac_sim5_2_net %v% "time_count")
env_extjac_sim5_3_net%v%"time_count_env" <- as.numeric(env_extjac_sim5_3_net %v% "time_count")
env_extjac_sim5_1_net%v%"RegionCon" <- as.character(env_extjac_sim5_1_net %v% "RegionCon")
env_extjac_sim5_2_net%v%"RegionCon" <- as.character(env_extjac_sim5_2_net %v% "RegionCon")
env_extjac_sim5_3_net%v%"RegionCon" <- as.character(env_extjac_sim5_3_net %v% "RegionCon")
env_extjac_sim5_1_net%v%"HInc" <- as.character(env_extjac_sim5_1_net %v% "HInc")
env_extjac_sim5_2_net%v%"HInc" <- as.character(env_extjac_sim5_2_net %v% "HInc")
env_extjac_sim5_3_net%v%"HInc" <- as.character(env_extjac_sim5_3_net %v% "HInc")

###########################################################################################
#                                       Table 1                                           #
###########################################################################################

# Save each provision's text as an object:
CAFTA_DR_lab <- "Each Party shall strive
to ensure that such labor principles and the internationally
recognized labor rights set forth in Article 16.8 are recognized
and protected by its law."

USA_JOR_lab <- "The Parties shall strive to ensure that such
labor principles and the internationally recognized labor rights
set forth in paragraph 6 are recognized and protected by domestic law."

# Save CAFTA_DR*2 and USA_JOR*2
CAFTA_DR_lab_double <- paste(CAFTA_DR_lab, CAFTA_DR_lab )
CAFTA_DR_USA_JOR_lab <- paste(CAFTA_DR_lab, USA_JOR_lab )

# Make these four texts into a corpus
PTA_lab_Corpus <- corpus(c(CAFTA_DR_lab, USA_JOR_lab, CAFTA_DR_lab_double, CAFTA_DR_USA_JOR_lab))

# Make "tokens" for each n-gram and remove punctuation, numbers, symbols, etc., make lowercase
toks <- tokens(PTA_lab_Corpus,
               remove_punct = TRUE,
               remove_twitter=TRUE, # twitter means @ & #
               remove_numbers = TRUE,
               remove_symbols = TRUE,
               ngrams=1) 
toks <- tokens_tolower(toks) # makes lowercase

# Now set up as 5 grams tokenized
toks5 <- tokens_ngrams(toks, n=5)

# Make the 5-gram document-feature matrix for the PTA labor provision example
PTA_eg_dfm1 <- dfm(toks5, 
                  stem = TRUE)  

# Calculate similarity scores on the 5-gram document-feature matrix 
cos_sim <- as.matrix(quanteda::textstat_simil(PTA_eg_dfm1,
                                              margin = "documents",
                                              method = "cosine"))

# Table 4 Results (in the paper for readability only entries below the diagonal are shown as above is a mirror image)
cos_sim

# Results: 
#          text1     text2     text3     text4
# text1 1.0000000 0.6521739 0.9789450 0.8858796
# text2 0.6521739 1.0000000 0.6384424 0.8858796
# text3 0.9789450 0.6384424 1.0000000 0.8672274
# text4 0.8858796 0.8858796 0.8672274 1.0000000


###########################################################################################
#                                  Plot Figures 1 to 3                                       #
###########################################################################################

# Note: the PTA similarity network plots for Figures 1-3 are made in igraph. 
# The position of nodes (PTAs) in the plots will vary each time the script is run but will always show the same nodes and ties. 

# Figure 1

# Convert "lab_cos_sim5_2_net" into a matrix
lab_cos_sim5_2 <- as.sociomatrix(lab_cos_sim5_2_net)

# Import "lab_cos_sim5_2" into igraph for plotting
glab_cos_sim5_binary<- graph.adjacency(lab_cos_sim5_2, mode="undirected", diag=FALSE) # from igraph

# Remove isolates (PTAs with no ties) for readability
glab_cos_sim5_binary <- delete.vertices(glab_cos_sim5_binary, V(glab_cos_sim5_binary)[ degree(glab_cos_sim5_binary)==0 ])

# Isolate components and save as a "groups" object (this will coerce data to work as if it were a community object, which igraph can use for plotting clusters)
components_list <- groups(components(glab_cos_sim5_binary))

# Set plot margins
par(mfrow=c(1,1), mar=c(2,2,2,2)) # "mfrow" sets 1 row, 1 column; "mar" sets margins

# Set file path, name, and size for the plot (defaults to save in working directory)
pdf("Figure_1.pdf", width=11.69, height=8.27)

# Plot, Changing colors of nodes, polygons, and polygon borders
plot(glab_cos_sim5_binary,
     layout= layout.reingold.tilford(glab_cos_sim5_binary,circular=T),
     vertex.frame.color=NA,
     mark.groups = components_list ,
     mark.border="darkgrey", # color of component outlines 
     mark.col=NA,
     vertex.color=NA,
     vertex.label.family="ArialMT", 
     vertex.label.font=c(1), # 1 plain, 2 bold, 3, italic, 4 bold italic, 5 symbol 
     vertex.label.cex=0.40,
     vertex.label.color="black",
     col=NA, # colour of nodes
     edge.color="lightgrey",
     vertex.size=5)

# Add labels for important components - this will vary depending on how the algorithm plots the network and needs to be manually adjusted
# Uncomment next line as an example 
# text(c(-1.35, 0, 1.2), c(0.65, -1.2, -0.1), c("EFTA/Eastern Europe", "EU", "US"))
    # first argument is "X" values, 2nd is "Y" values, third is label
dev.off()


# Figure 2

# Convert "env_cos_sim5_2_net" into a matrix
env_cos_sim5_2 <- as.sociomatrix(env_cos_sim5_2_net)

# Import "env_cos_sim5_2" into igraph for plotting
genv_cos_sim5_binary<- graph.adjacency(env_cos_sim5_2, mode="undirected", diag=FALSE) # from igraph

# Remove isolates (PTAs with no ties) for readability
genv_cos_sim5_binary <- delete.vertices(genv_cos_sim5_binary, V(genv_cos_sim5_binary)[ degree(genv_cos_sim5_binary)==0 ])

# Isolate components and save as a "groups" object (this will coerce data to work as if it were a community object, which igraph can use for plotting clusters)
components_list <- groups(components(genv_cos_sim5_binary))

# Set plot margins
par(mfrow=c(1,1), mar=c(2,2,2,2)) # "mfrow" sets 1 row, 1 column; "mar" sets margins

# Set file path, name, and size for the plot (defaults to save in working directory)
pdf("Figure_2.pdf", width=11.69, height=8.27)

# Plot, Changing colors of nodes, polygons, and polygon borders
plot(genv_cos_sim5_binary,
     layout= layout.reingold.tilford(genv_cos_sim5_binary,circular=T),
     vertex.frame.color=NA,
     mark.groups = components_list ,
     mark.border="darkgrey", # color of component outlines 
     mark.col=NA,
     vertex.color=NA,
     vertex.label.family="ArialMT", 
     vertex.label.font=c(1), # 1 plain, 2 bold, 3, italic, 4 bold italic, 5 symbol 
     vertex.label.cex=0.40,
     vertex.label.color="black",
     col=NA, # colour of nodes
     edge.color="lightgrey",
     vertex.size=5)

# Add labels for important components - this will vary depending on how the algorithm plots the network and needs to be manually adjusted
# text(c(-0.85, -1.4, 0, 1.3, -1.2), c(0.9, -0.2, -1.2, 0, 0.4), c("Canada/Taiwan, etc.", "EFTA", "EU", "US", "Japan/Jordan"))
  # first argument is "X" values, 2nd is "Y" values, third is label
dev.off()

# Figure 3

# Import "fulltext_cos_sim5_2" into igraph for plotting
fulltext_cos_sim5_2 <- as.matrix(fulltext_cos_sim5_2)
gfulltext_cos_sim5_binary<- graph.adjacency(fulltext_cos_sim5_2, mode="undirected", diag=FALSE) # from igraph

# Remove isolates (PTAs with no ties) for readability
gfulltext_cos_sim5_binary <- delete.vertices(gfulltext_cos_sim5_binary, V(gfulltext_cos_sim5_binary)[ degree(gfulltext_cos_sim5_binary)==0 ])

# Isolate components and save as a "groups" object (this will coerce data to work as if it were a community object, which igraph can use for plotting clusters)
components_list <- groups(components(gfulltext_cos_sim5_binary))

# Set plot margins
par(mfrow=c(1,1), mar=c(2,2,2,2)) # "mfrow" sets 1 row, 1 column; "mar" sets margins

# Set file path, name, and size for the plot (defaults to save in working directory)
pdf("Figure_3.pdf", width=11.69, height=8.27)

# Plot, Changing colors of nodes, polygons, and polygon borders
plot(gfulltext_cos_sim5_binary,
     layout= layout.reingold.tilford(gfulltext_cos_sim5_binary,circular=T),
     vertex.frame.color=NA,
     mark.groups = components_list ,
     mark.border="darkgrey", # color of component outlines 
     mark.col=NA,
     vertex.color=NA,
     vertex.label.family="ArialMT", 
     vertex.label.font=c(1), # 1 plain, 2 bold, 3, italic, 4 bold italic, 5 symbol 
     vertex.label.cex=0.40,
     vertex.label.color="black",
     col=NA, # colour of nodes
     edge.color="lightgrey",
     vertex.size=5)

# Add labels for important components - this will vary depending on how the algorithm plots the network and needs to be manually adjusted
# text(c(-1.25, 0, 1.1, 1), c(0, -0.9, -0.5, 0.5), c("Eastern Europe/Middle East", "EFTA", "EU", "US"))
# first argument is "X" values, 2nd is "Y" values, third is label
dev.off()


# Note: for added readability, in the version used in the paper, the line types used in these plots were edited in Illustrator. 
###########################################################################################
#                                       Table 2                                           #
###########################################################################################

# First save fulltext network as a network object prior to calculations
fulltext_cos_sim5_2_net<- network(fulltext_cos_sim5_2,matrix.type="adjacency",directed=FALSE) 

# Find out number of ties for column 1 in Table 1 (this is listed beside "total edges =" in the output)
lab_cos_sim5_2_net # 470
env_cos_sim5_2_net # 47
fulltext_cos_sim5_2_net # 1281

# Calculate Mean ties per PTA for column 2 in Table 1
# I.e. is "total edges"/"vertices"
lab_cos_sim5_2_net # 470/135 =  3.481481
env_cos_sim5_2_net # 47/107 = 0.4392523
fulltext_cos_sim5_2_net # 1281/348 = 3.681034

# Calculate Density Scores for column 3 in Table 1
# Load package "sna" (If necessary install first)
# As "sna" can interfere with other networks packages, we unload it directly after doing the calculations
# install.packages(sna) # script last checked using version 2.4
library(sna)
sna::gden(fulltext_cos_sim5_2_net, mode="graph") # = 0.02121634
sna::gden(lab_cos_sim5_2_net, mode="graph") # ==  0.05196241
sna::gden(env_cos_sim5_2_net, mode="graph") # ==  0.00828778
detach("package:sna", unload=TRUE)

# Find out Number of PTAs in each network for column 4 in Table 1 (this information is beside "vertices =")
lab_cos_sim5_2_net # 135
env_cos_sim5_2_net # 107
fulltext_cos_sim5_2_net # 348

###########################################################################################
#                                Notes on Lolog Model Terms                               #
###########################################################################################

# nodeMatch = do the vertex attributes (vertex level variables) match between two nodes/vertices? i.e. are they coded the same?
# absDiff = the absolute difference between two nodes'/vertices' attributes 
# nodeCov = a vertex attribute/covariate coded on the level of the node/vertex (i.e. here a PTA-level attribute)
# edgeCov = a covariate/attribute coded on the dyad/edge/tie-level (i.e. here a PTA-dyad-level attribute)
# gwdegree(X) = Degree Popularity term (geometrically weighted) where X is a decay term 
# edges = akin to an intercept
# | = any term following "|" denotes the order in which the ties are formed.


###########################################################################################
#                                Main Models/Table 3                                      #
###########################################################################################

# * Important Note: Models are simulation based and coefficients will vary slightly from one run to the next *

# Model 1: Labor
Model_1 <- lolog(lab_cos_sim5_2_net~
                   gwdegree(1)+
                   edgeCov(no_common_member_lab, "no_common_member_lab") +
                   nodeMatch("HInc") +
                   nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                   edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                   nodeMatch("RegionCon") +
                   nodeCov("bilateral") +
                   absDiff("time_count_lab") + edges  | 
                   time_count_lab, maxIter=200, nsamp=3000, verbose=TRUE )
summary(Model_1)


# Model 2: Environment
Model_2 <- lolog(env_cos_sim5_2_net~ 
                   gwdegree(1) +
                   edgeCov(no_common_member_env, "no_common_member_env") +
                   nodeMatch("HInc") +
                   nodeCov("greenhouse_of_max_rgdpe_cap") +
                   edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                   nodeMatch("RegionCon") +
                   nodeCov("bilateral") +
                   absDiff("time_count_env") + 
                   edges  |  
                   time_count_env, maxIter=200, nsamp=3000, verbose=TRUE )
summary(Model_2)


# Model 3: Labor
Model_3 <- lolog(lab_cos_sim5_2_net~
                   gwdegree(1)+ 
                   edgeCov(no_common_member_lab, "no_common_member_lab") +
                   edgeCov(common_member_WB_high_lab, "common_member_WB_high_lab") +
                   nodeMatch("HInc") +
                   nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                   edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                   nodeMatch("RegionCon") +
                   nodeCov("bilateral") +
                   absDiff("time_count_lab") + edges  | 
                   time_count_lab, maxIter=200, nsamp=3000, verbose=TRUE )
summary(Model_3)


#Model 4
Model_4 <- lolog(env_cos_sim5_2_net~
                   gwdegree(1) + 
                   edgeCov(no_common_member_env, "no_common_member_env") +
                   edgeCov(common_member_WB_high_env, "common_member_WB_high_env") +
                   nodeMatch("HInc") +
                   nodeCov("greenhouse_of_max_rgdpe_cap") + 
                   edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                   nodeMatch("RegionCon") +
                   nodeCov("bilateral") +
                   absDiff("time_count_env") + edges  | 
                   time_count_env, maxIter=200, nsamp=3000, verbose=TRUE )
summary(Model_4)


###########################################################################################
#                                  Figure 4                                               #
###########################################################################################

# Make plots of regression coefficients for Figure 4
# First save model coefficients as data.frames
results_Model_1 <- as.data.frame(summary(Model_1))
results_Model_2 <- as.data.frame(summary(Model_2))
results_Model_3 <- as.data.frame(summary(Model_3))
results_Model_4 <- as.data.frame(summary(Model_4))

# Rename columns to be "term" (i.e. names of coefficients), "estimate" and "std.error" (as required by dotwhisker)
results_Model_1$observed_statistics <- NULL
results_Model_1$pvalue <- NULL
results_Model_2$observed_statistics <- NULL
results_Model_2$pvalue <- NULL
results_Model_3$observed_statistics <- NULL
results_Model_3$pvalue <- NULL
results_Model_4$observed_statistics <- NULL
results_Model_4$pvalue <- NULL
results_Model_1$term <- row.names(results_Model_1) 
results_Model_2$term <- row.names(results_Model_2) 
results_Model_3$term <- row.names(results_Model_3) 
results_Model_4$term <- row.names(results_Model_4) 
names(results_Model_1) <- c( "estimate", "std.error", "term") 
names(results_Model_2) <- c( "estimate", "std.error", "term") 
names(results_Model_3) <- c( "estimate", "std.error", "term") 
names(results_Model_4) <- c( "estimate", "std.error", "term") 

# Relabel the variables
results_Model_1$term <- c("Degree Popularity", "No Common Member", "High Income (Both)","NTI Performance (Max GDP/cap)", "Full Text Tie",  "Same Region", "Bilateral", "Years Apart", "Edges") 
results_Model_2$term <- c("Degree Popularity", "No Common Member", "High Income (Both)","NTI Performance (Max GDP/cap)", "Full Text Tie",  "Same Region", "Bilateral", "Years Apart", "Edges") 
results_Model_3$term <- c("Degree Popularity", "No Common Member", "Common High Income Member",  "High Income (Both)", "NTI Performance (Max GDP/cap)", "Full Text Tie",  "Same Region", "Bilateral", "Years Apart", "Edges") 
results_Model_4$term <- c("Degree Popularity", "No Common Member", "Common High Income Member",  "High Income (Both)", "NTI Performance (Max GDP/cap)", "Full Text Tie",  "Same Region", "Bilateral", "Years Apart", "Edges") 

# Reorder the rows:
results_Model_1 <- results_Model_1[c("gwdegree.1",  "edgeCov.no_common_member_lab",  "nodematch.HInc", "nodecov.ciri_worker_of_max_rgdpe_cap_reverse",
                                     "edgeCov.fulltext_cos_sim5_2_lab",   "nodematch.RegionCon" , "nodecov.bilateral", "absDiff.time_count_lab", "edges" ),]
results_Model_2 <- results_Model_2[c("gwdegree.1",  "edgeCov.no_common_member_env", "nodematch.HInc",  "nodecov.greenhouse_of_max_rgdpe_cap",
                                                 "edgeCov.fulltext_cos_sim5_2_env",   "nodematch.RegionCon" , "nodecov.bilateral", "absDiff.time_count_env", "edges" ),]
results_Model_3 <- results_Model_3[c("gwdegree.1",  "edgeCov.no_common_member_lab", "edgeCov.common_member_WB_high_lab" , "nodematch.HInc",  "nodecov.ciri_worker_of_max_rgdpe_cap_reverse",
                                     "edgeCov.fulltext_cos_sim5_2_lab",   "nodematch.RegionCon" , "nodecov.bilateral", "absDiff.time_count_lab", "edges" ),]
results_Model_4 <- results_Model_4[c("gwdegree.1",  "edgeCov.no_common_member_env", "edgeCov.common_member_WB_high_env" ,"nodematch.HInc",   "nodecov.greenhouse_of_max_rgdpe_cap",
                                                 "edgeCov.fulltext_cos_sim5_2_env",   "nodematch.RegionCon" , "nodecov.bilateral","absDiff.time_count_env", "edges" ),]

# Stack (& label) the dataframes so they can be plotted together
results_Model_1$model <- "Model 1"
results_Model_2$model <- "Model 2"
results_Model_3$model <- "Model 3"
results_Model_4$model <- "Model 4"

results_all <- rbind(results_Model_4, results_Model_3, results_Model_2, results_Model_1)


# plot results with groupings
# Create list of brackets for x-axis (order = label, first included predictor, last included predictor)
four_brackets <- list(c("Efficiency", "Degree Popularity", "No Common Member"), 
                      c("Both", "Common High Income Member", "Common High Income Member"),
                      c("Distribution", "High Income (Both)", "NTI Performance (Max GDP/cap)"),
                      c("Controls", "Full Text Tie", "Years Apart"))

# Set file name and destination for plot (it will save the plot in the working directory unless otherwise specified)
pdf(file = "Figure_4.pdf", width=10, height=5) 
{dwplot(results_all,  vline = geom_vline(xintercept = 0, colour = "grey60", linetype = 2)) + # plot line at zero _behind_ coefs) 
    theme_bw() + xlab("Coefficient Estimate") + ylab("") +
    theme(legend.title = element_blank()) +
    scale_x_continuous(breaks=c(-4, -3, -2, -1, 0, 1, 2, 3,4))+
    scale_colour_grey(start = .3, end = .7)}   %>% add_brackets(four_brackets)
dev.off()  

# Note: The order of the legend was edited in Illustrator in the version included in the paper to improve readability.


###########################################################################################
#                                  Figure 5                                               #
###########################################################################################

# Note: For improved readability in black and white, the colour of the red lines
# were edited in the version of these plots in the paper using Illustrator.

# Combine GOF plots to make Figure 5
# Run goodness of fit command 
gdeg_Model_1 <- gofit(Model_1 , lab_cos_sim5_2_net ~ degree(0:10)) 
gdeg_Model_2 <- gofit(Model_2 , env_cos_sim5_2_net ~ degree(0:10)) 
gdeg_Model_3 <- gofit(Model_3, lab_cos_sim5_2_net ~ degree(0:10)) 
gdeg_Model_4 <- gofit(Model_4, env_cos_sim5_2_net ~ degree(0:10)) 

# Set file name and destination for plot (it will save the plot in the working directory unless otherwise specified)
pdf(file = "Figure_5.pdf", width=11.69, height=9)
# Make plots
p1 <- plot(gdeg_Model_1)
p2 <- plot(gdeg_Model_2)
p3 <- plot(gdeg_Model_3)
p4 <- plot(gdeg_Model_4) 
# Combine plots
grid.arrange(arrangeGrob(p1, top="Model 1"), arrangeGrob(p2, top="Model 2"),
             arrangeGrob(p3, top="Model 3"), arrangeGrob(p4, top="Model 4") ,
             nrow = 2, ncol=2)
dev.off()

###########################################################################################
#                     Appendix A: Top 20 most similar agreements                          #
###########################################################################################

# Note: for PTAs with the same similarity score, the order of the PTAs listed
# in the appendix may vary from the order shown in the calculations here. 

# 1. PTA Labor Provisions
# Set-up the data
lab_cos_sim5 <- as.matrix(lab_cos_sim5)
diag(lab_cos_sim5) <- NA # set diagonal to NA as we are not interested in PTAs being similar with themselves
lab_cos_sim5[lower.tri(lab_cos_sim5)] <- NA # as the matrix is symmetric, we only need either below or above diagonal, rest can be NA
# Convert data into long format
top_lab5 <- as.data.frame(as.table(lab_cos_sim5))
# Order the data by highest score to lowest
top_lab5 <- top_lab5[order(-top_lab5$Freq),] 
# Cut back data to top 20 to get published version (but as there are 49 PTAs with sim. score of 1 for labor, let's look at them all)
top_lab5 <- top_lab5[1:49,] 
# View top 20 
top_lab5

# 2. PTA Environmental Provisions
# Set-up the data
env_cos_sim5 <- as.matrix(env_cos_sim5)
diag(env_cos_sim5) <- NA #  set diagonal to NA as we are not interested in PTAs being similar with themselves
env_cos_sim5[lower.tri(env_cos_sim5)] <- NA # as the matrix is symmetric, we only need either below or above diagonal, rest can be NA
# Order the data by highest score to lowest
top_env5 <- as.data.frame(as.table(env_cos_sim5))
top_env5 <- top_env5[order(-top_env5$Freq),] 
# Cut back data to top 20 
top_env5 <- top_env5[1:20,]
# View top 20 
top_env5

# 3. PTA Full text
# Set-up the data
fulltext_cos_sim5 <- as.matrix(fulltext_cos_sim5)
diag(fulltext_cos_sim5) <- NA #  set diagonal to NA as we are not interested in PTAs being similar with themselves
fulltext_cos_sim5[lower.tri(fulltext_cos_sim5)] <- NA # as the matrix is symmetric, we only need either below or above diagonal, rest can be NA
# Convert data into long format
top_fulltext5 <- as.data.frame(as.table(fulltext_cos_sim5))
# Order the data by highest score to lowest
top_fulltext5 <- top_fulltext5[order(-top_fulltext5$Freq),] 
# Cut back data to top 20 
top_fulltext5 <- top_fulltext5[1:20,]
# View top 20 
top_fulltext5

###########################################################################################
#                          Appendix C: Robustness Checks                                  #
###########################################################################################

# * Important Note: Models are simulation based and coefficients will vary slightly from one run to the next *

# Table C1 

# Model C1
Model_C1 <- lolog(lab_cos_sim5_1_net~
                                           gwdegree(1)+ 
                                           edgeCov(no_common_member_lab, "no_common_member_lab") +
                                           nodeMatch("HInc") +
                                           nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                           edgeCov(fulltext_cos_sim5_1_lab, "fulltext_cos_sim5_1_lab") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_lab") + edges  |  
                                           time_count_lab, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C1)


# Model C2
Model_C2 <- lolog(env_cos_sim5_1_net~
                                           gwdegree(1) +
                                           edgeCov(no_common_member_env, "no_common_member_env") +
                                           nodeMatch("HInc") +
                                           nodeCov("greenhouse_of_max_rgdpe_cap") +
                                           edgeCov(fulltext_cos_sim5_1_env, "fulltext_cos_sim5_1_env") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_env") + 
                                           edges  |  
                                           time_count_env, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C2)


# Model C3
Model_C3 <- lolog(lab_cos_sim5_3_net~
                                           gwdegree(1)+ 
                                           edgeCov(no_common_member_lab, "no_common_member_lab") +
                                           nodeMatch("HInc") +
                                           nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                           edgeCov(fulltext_cos_sim5_3_lab, "fulltext_cos_sim5_3_lab") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_lab") + edges  |  
                                           time_count_lab, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C3)


# Model C4
Model_C4 <- lolog(env_cos_sim5_3_net~
                                           gwdegree(1) +
                                           edgeCov(no_common_member_env, "no_common_member_env") +
                                           nodeMatch("HInc") +
                                           nodeCov("greenhouse_of_max_rgdpe_cap") +
                                           edgeCov(fulltext_cos_sim5_3_env, "fulltext_cos_sim5_3_env") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_env") + 
                                           edges  |  
                                           time_count_env, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C4)


# Model C5
Model_C5 <- lolog(lab_extjac_sim5_2_net~
                                              gwdegree(1)+ 
                                              edgeCov(no_common_member_lab, "no_common_member_lab") +
                                              nodeMatch("HInc") +
                                              nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                              edgeCov(fulltext_extjac_sim5_2_lab, "fulltext_extjac_sim5_2_lab") +
                                              nodeMatch("RegionCon") +
                                              nodeCov("bilateral") +
                                              absDiff("time_count_lab") + edges  | 
                                              time_count_lab, maxIter=200, nsamp=3000,
                                            verbose=TRUE )
summary(Model_C5)



# Model C6
Model_C6 <- lolog(env_extjac_sim5_2_net~
                                              gwdegree(1) +
                                              edgeCov(no_common_member_env, "no_common_member_env") +
                                              nodeMatch("HInc") +
                                              nodeCov("greenhouse_of_max_rgdpe_cap") +
                                              edgeCov(fulltext_extjac_sim5_2_env, "fulltext_extjac_sim5_2_env") +
                                              nodeMatch("RegionCon") +
                                              nodeCov("bilateral") +
                                              absDiff("time_count_env") + 
                                              edges  |  
                                              time_count_env, maxIter=200, nsamp=3000,
                                            verbose=TRUE )
summary(Model_C6)


# Model C7
Model_C7 <- lolog(lab_cos_sim3_2_net~
                                           gwdegree(1)+ 
                                           edgeCov(no_common_member_lab, "no_common_member_lab") +
                                           nodeMatch("HInc") +
                                           nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                           edgeCov(fulltext_cos_sim3_2_lab, "fulltext_cos_sim3_2_lab") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_lab") + edges  | 
                                           time_count_lab, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C7)


# Model C8
Model_C8 <- lolog(env_cos_sim3_2_net~
                                           gwdegree(1) +
                                           edgeCov(no_common_member_env, "no_common_member_env") +
                                           nodeMatch("HInc") +
                                           nodeCov("greenhouse_of_max_rgdpe_cap") +
                                           edgeCov(fulltext_cos_sim3_2_env, "fulltext_cos_sim3_2_env") +
                                           nodeMatch("RegionCon") +
                                           nodeCov("bilateral") +
                                           absDiff("time_count_env") + 
                                           edges  |  
                                           time_count_env, maxIter=200, nsamp=3000,
                                         verbose=TRUE )
summary(Model_C8)



# Table C2 

# Model C9
Model_C9 <- lolog(lab_cos_sim5_2_net~
                                                   gwdegree(0)+ 
                                                   edgeCov(no_common_member_lab, "no_common_member_lab") +
                                                   nodeMatch("HInc") +
                                                   nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                                   edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                                   nodeMatch("RegionCon") +
                                                   nodeCov("bilateral") +
                                                   absDiff("time_count_lab") + edges  |    
                                                   time_count_lab, maxIter=200, nsamp=3000,
                                                 verbose=TRUE )
summary(Model_C9)

# Model C10
Model_C10 <- lolog(env_cos_sim5_2_net~
                                                    gwdegree(0) +
                                                    edgeCov(no_common_member_env, "no_common_member_env") +
                                                    nodeMatch("HInc") +
                                                    nodeCov("greenhouse_of_max_rgdpe_cap") +
                                                    edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                                    nodeMatch("RegionCon") +
                                                    nodeCov("bilateral") +
                                                    absDiff("time_count_env") +   
                                                    edges  |  
                                                    time_count_env, maxIter=200, nsamp=3000,
                                                  verbose=TRUE )
summary(Model_C10)


# C11
Model_C11 <- lolog(lab_cos_sim5_2_net~
                                                      gwdegree(0.5)+ 
                                                      edgeCov(no_common_member_lab, "no_common_member_lab") +
                                                      nodeMatch("HInc") +
                                                      nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                                      edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                                      nodeMatch("RegionCon") +
                                                      nodeCov("bilateral") +
                                                      absDiff("time_count_lab") + edges  |    
                                                      time_count_lab, maxIter=200, nsamp=3000,
                                                    verbose=TRUE )
summary(Model_C11)

# Model C12
Model_C12 <- lolog(env_cos_sim5_2_net~
                                                      gwdegree(0.5) +
                                                      edgeCov(no_common_member_env, "no_common_member_env") +
                                                      nodeMatch("HInc") +
                                                      nodeCov("greenhouse_of_max_rgdpe_cap") +
                                                      edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                                      nodeMatch("RegionCon") +
                                                      nodeCov("bilateral") +
                                                      absDiff("time_count_env") +   
                                                      edges  |  
                                                      time_count_env, maxIter=200, nsamp=3000,
                                                    verbose=TRUE )
summary(Model_C12)



# Model C13
Model_C13 <- lolog(lab_cos_sim5_2_net~
                                            gwdegree(1)+ 
                                            edgeCov(no_common_member_lab, "no_common_member_lab") +
                                            nodeMatch("HInc") +
                                            nodeCov("ciri_worker_of_max_rgdpo_reverse") +
                                            edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_lab") + edges  |    
                                            time_count_lab, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C13)

# Model C14
Model_C14 <- lolog(env_cos_sim5_2_net~
                                            gwdegree(1) +
                                            edgeCov(no_common_member_env, "no_common_member_env") +
                                            nodeMatch("HInc") +
                                            nodeCov("greenhouse_of_max_rgdpo") +
                                            edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_env") +   
                                            edges  |  
                                            time_count_env, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C14)


# Model C15
Model_C15 <- lolog(lab_cos_sim5_2_net~
                                            gwdegree(1)+ 
                                            edgeCov(no_common_member_lab, "no_common_member_lab") +
                                            nodeMatch("HInc") +
                                            nodeCov("ciri_worker_of_max_rgdpe_cap_reverse") +
                                            edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("members_count") +
                                            absDiff("time_count_lab") + edges  |    
                                            time_count_lab, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C15)

# Model C16
Model_C16 <- lolog(env_cos_sim5_2_net~
                                            gwdegree(1) +
                                            edgeCov(no_common_member_env, "no_common_member_env") +
                                            nodeMatch("HInc") +
                                            nodeCov("greenhouse_of_max_rgdpe_cap") +
                                            edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("members_count") +
                                            absDiff("time_count_env") +   
                                            edges  |  
                                            time_count_env, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C16)


# Model C17
Model_C17 <- lolog(lab_cos_sim5_2_net~
                                            gwdegree(1)+ 
                                            edgeCov(no_common_member_lab, "no_common_member_lab") +
                                            nodeMatch("HInc") +
                                            nodeCov("ciri_physint_of_max_rgdpe_cap_reverse") +
                                            edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_lab") + edges  |    
                                            time_count_lab, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C17)

# Model C18
Model_C18 <- lolog(env_cos_sim5_2_net~
                                            gwdegree(1) +
                                            edgeCov(no_common_member_env, "no_common_member_env") +
                                            nodeMatch("HInc") +
                                            nodeCov("EPI_of_max_gdpcap") +
                                            edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_env") +   
                                            edges  |  
                                            time_count_env, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C18)



# Model C19
Model_C19 <- lolog(lab_cos_sim5_2_net~
                                            gwdegree(1)+ 
                                            edgeCov(no_common_member_lab, "no_common_member_lab") +
                                            nodeMatch("top20GDP_PTAmember_sample_years_lab") +
                                            nodeCov("ciri_worker_of_max_rgdpo_reverse") +
                                            edgeCov(fulltext_cos_sim5_2_lab, "fulltext_cos_sim5_2_lab") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_lab") +
                                            edges  |    
                                            time_count_lab, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C19)

# Model C20
Model_C20 <- lolog(env_cos_sim5_2_net~
                                            gwdegree(1) +
                                            edgeCov(no_common_member_env, "no_common_member_env") +
                                            nodeMatch("top20GDP_PTAmember_sample_years_env") +
                                            nodeCov("greenhouse_of_max_rgdpo") +
                                            edgeCov(fulltext_cos_sim5_2_env, "fulltext_cos_sim5_2_env") +
                                            nodeMatch("RegionCon") +
                                            nodeCov("bilateral") +
                                            absDiff("time_count_env") +   
                                            edges  |  
                                            time_count_env, maxIter=200, nsamp=3000,
                                          verbose=TRUE )
summary(Model_C20)


# save.image(file="boilerplate_replication.RData")

